set matsize 5000

capture program drop GenPolynomial
program define GenPolynomial
gen above           = totmuj>=20
gen totmuj2         = (totmuj-20)^2
gen totmuj3         = (totmuj-20)^3
gen totmuj_above    = (totmuj-20)*above
gen totmuj2_above   = (totmuj-20)^2*above
gen totmuj3_above   = (totmuj-20)^3*above
gen totmuj_original = totmuj
replace totmuj      = totmuj-20
end

capture program drop Table_Panels 
program define Table_Panels `1' `2'

local var_number = 1
local table_name "TABLES/`2'_range`1'"
local table_name = subinstr("`table_name'", " ", "", .)

foreach y in $Outcomes_List {

if `var_number' == 1 {
	esttab `y'_* ///
	   using `table_name', keep(above) stats(N, fmt(%9.0gc)) ///
	   cells(b(fmt(%9.3f) star) se(par fmt(%9.3f))) starlevels(* 0.10 ** 0.05 *** 0.01) label replace ///
	   style(tex) booktabs title("`: var label `y''")
}
else {
	esttab `y'_* ///
	   using `table_name', keep(above) stats(N, fmt(%9.0gc)) ///
	   cells(b(fmt(%9.3f) star) se(par fmt(%9.3f))) starlevels(* 0.10 ** 0.05 *** 0.01) label append ///
	   style(tex) booktabs nonumbers title("`: var label `y''")
}
local var_number = `var_number' + 1
di "`var_number'"
}

end

********************************************************************************

*******************************
**** TABLE 6 ******************
*******************************
{
foreach bw of numlist 15 10{
use DTA/Datos_ENIA_95_07_Processed.dta, clear
GenPolynomial

global SubSample all period1 period2 period3 sectorA sectorB small large // sectorC sectorD
global Outcomes_List mw

********************************************************************************
* Main specifications:
foreach var in $Outcomes_List{
estimates clear

matrix P = J(8,1,.)
local i = 0 
foreach sub_sample in $SubSample {
local i = `i'+1
qui areg `var' totmuj totmuj2 totmuj_above totmuj2_above above i.year, robust a(ciiu3), if abs(totmuj) <= `bw' & `sub_sample' == 1

estimates store `var'_`sub_sample'
qui estadd local SAMPLE `sub_sample'
matrix P[`i',1] = r(table)["pvalue","above"]
}



**************************
**** Sharpened q-value ***
**************************
preserve 
* This code generates BKY (2006) sharpened two-stage q-values as described in Anderson (2008), "Multiple Inference and Gender Differences in the Effects of Early Intervention: A Reevaluation of the Abecedarian, Perry Preschool, and Early Training Projects", Journal of the American Statistical Association, 103(484), 1481-1495

* BKY (2006) sharpened two-stage q-values are introduced in Benjamini, Krieger, and Yekutieli (2006), "Adaptive Linear Step-up Procedures that Control the False Discovery Rate", Biometrika, 93(3), 491-507

* Last modified: M. Anderson, 11/20/07
* Test Platform: Stata/MP 10.0 for Macintosh (Intel 32-bit), Mac OS X 10.5.1
* Should be compatible with Stata 10 or greater on all platforms
* Likely compatible with with Stata 9 or earlier on all platforms (remove "version 10" line below)

version 10

****  INSTRUCTIONS:
****    Please start with a clear data set
****	When prompted, paste the vector of p-values you are testing into the "pval" variable
****	Please use the "do" button rather than the "run" button to run this file (if you use "run", you will miss the instructions at the prompts)

pause on
set more off

if _N>0 {
	display "Please clear data set before proceeding"
	display "After clearing, type 'q' to resume"
	clear
	//pause
	}	
	 

quietly gen float pval = .

display "***********************************"
display "Please paste the vector of p-values that you wish to test into the variable 'pval'"
display	"After pasting, type 'q' to resume"
display "***********************************"


local i = 0
if `i' == 0 {
	local i = `i' + 1
* Obtenemos el número de elementos en la matriz P
local n = rowsof(P) * colsof(P)

set obs `n' 
* Copiamos los p-values de la matriz P a la variable pval
forvalues i = 1/`n' {
    quietly replace pval = P[`i',1] if _n ==`i'
}
}

//pause



* Collect the total number of p-values tested

quietly sum pval
local totalpvals = r(N)

* Sort the p-values in ascending order and generate a variable that codes each p-value's rank

quietly gen int original_sorting_order = _n
quietly sort pval
quietly gen int rank = _n if pval~=.

* Set the initial counter to 1 

local qval = 1

* Generate the variable that will contain the BKY (2006) sharpened q-values

gen bky06_qval = 1 if pval~=.

* Set up a loop that begins by checking which hypotheses are rejected at q = 1.000, then checks which hypotheses are rejected at q = 0.999, then checks which hypotheses are rejected at q = 0.998, etc.  The loop ends by checking which hypotheses are rejected at q = 0.001.


while `qval' > 0 {
	* First Stage
	* Generate the adjusted first stage q level we are testing: q' = q/1+q
	local qval_adj = `qval'/(1+`qval')
	* Generate value q'*r/M
	gen fdr_temp1 = `qval_adj'*rank/`totalpvals'
	* Generate binary variable checking condition p(r) <= q'*r/M
	gen reject_temp1 = (fdr_temp1>=pval) if pval~=.
	* Generate variable containing p-value ranks for all p-values that meet above condition
	gen reject_rank1 = reject_temp1*rank
	* Record the rank of the largest p-value that meets above condition
	egen total_rejected1 = max(reject_rank1)

	* Second Stage
	* Generate the second stage q level that accounts for hypotheses rejected in first stage: q_2st = q'*(M/m0)
	local qval_2st = `qval_adj'*(`totalpvals'/(`totalpvals'-total_rejected1[1]))
	* Generate value q_2st*r/M
	gen fdr_temp2 = `qval_2st'*rank/`totalpvals'
	* Generate binary variable checking condition p(r) <= q_2st*r/M
	gen reject_temp2 = (fdr_temp2>=pval) if pval~=.
	* Generate variable containing p-value ranks for all p-values that meet above condition
	gen reject_rank2 = reject_temp2*rank
	* Record the rank of the largest p-value that meets above condition
	egen total_rejected2 = max(reject_rank2)

	* A p-value has been rejected at level q if its rank is less than or equal to the rank of the max p-value that meets the above condition
	replace bky06_qval = `qval' if rank <= total_rejected2 & rank~=.
	* Reduce q by 0.001 and repeat loop
	drop fdr_temp* reject_temp* reject_rank* total_rejected*
	local qval = `qval' - .001
}
	

quietly sort original_sorting_order
pause off
set more on

display "Code has completed."
display "Benjamini Krieger Yekutieli (2006) sharpened q-vals are in variable 'bky06_qval'"
display	"Sorting order is the same as the original vector of p-values"


* Note: Sharpened FDR q-vals can be LESS than unadjusted p-vals when many hypotheses are rejected, because if you have many true rejections, then you can tolerate several false rejections too (this effectively just happens for p-vals that are so large that you are not going to reject them regardless).

save TABLES\sharpened_qvalue\T6_sh_qvalue_`var'_`bw'.dta, replace 
restore 
}
Table_Panels `bw' "Table6"

********************************************************************************
**rwolf2 
********************************************************************************
use DTA/Datos_ENIA_95_07_Processed.dta, clear
GenPolynomial

set seed 1234

global Subsamples all period1 period2 period3 small large sectorA sectorB //sectorC s
//global graph_vars kw km mwectorD // period4 sectorA_old sectorB_old
//local range = 10

local dep_var mw 


#delimit ;
rwolf2  (reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & all==1, vce(robust) )
		(reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & period1==1, vce(robust))
		(reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & period2==1, vce(robust))
		(reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & period3==1, vce(robust))
		(reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & sectorA==1, vce(robust))
		(reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & sectorB==1, vce(robust))
		(reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & small==1, vce(robust))
		(reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & large==1, vce(robust)),
		indepvars(above, above, above, above, above, above, above, above ) usevalid;	
#delimit cr 
}
}


*******************************
**** TABLE 7 ******************
*******************************
{
use DTA/Datos_ENIA_95_07_Processed.dta, clear
GenPolynomial

global SubSample all period1 period2 period3 sectorA sectorB small large // sectorC sectorD
global Outcomes_List kw_total kw_rep kw_nrep elecw combw 
local bw 10 

********************************************************************************
* Main specifications:
foreach var in $Outcomes_List{
*estimates clear

matrix P = J(8,1,.)
local i = 0 
foreach sub_sample in $SubSample {
local i = `i'+1
qui areg `var' totmuj totmuj2 totmuj_above totmuj2_above above i.year, robust a(ciiu3), if abs(totmuj) <= `bw' & `sub_sample' == 1
estimates store `var'_`sub_sample'
qui estadd local SAMPLE `sub_sample'
matrix P[`i',1] = r(table)["pvalue","above"]
}



**************************
**** Sharpened q-value ***
**************************
preserve 
* This code generates BKY (2006) sharpened two-stage q-values as described in Anderson (2008), "Multiple Inference and Gender Differences in the Effects of Early Intervention: A Reevaluation of the Abecedarian, Perry Preschool, and Early Training Projects", Journal of the American Statistical Association, 103(484), 1481-1495

* BKY (2006) sharpened two-stage q-values are introduced in Benjamini, Krieger, and Yekutieli (2006), "Adaptive Linear Step-up Procedures that Control the False Discovery Rate", Biometrika, 93(3), 491-507

* Last modified: M. Anderson, 11/20/07
* Test Platform: Stata/MP 10.0 for Macintosh (Intel 32-bit), Mac OS X 10.5.1
* Should be compatible with Stata 10 or greater on all platforms
* Likely compatible with with Stata 9 or earlier on all platforms (remove "version 10" line below)

version 10

****  INSTRUCTIONS:
****    Please start with a clear data set
****	When prompted, paste the vector of p-values you are testing into the "pval" variable
****	Please use the "do" button rather than the "run" button to run this file (if you use "run", you will miss the instructions at the prompts)

pause on
set more off

if _N>0 {
	display "Please clear data set before proceeding"
	display "After clearing, type 'q' to resume"
	clear
	//pause
	}	
	 

quietly gen float pval = .

display "***********************************"
display "Please paste the vector of p-values that you wish to test into the variable 'pval'"
display	"After pasting, type 'q' to resume"
display "***********************************"


local i = 0
if `i' == 0 {
	local i = `i' + 1
* Obtenemos el número de elementos en la matriz P
local n = rowsof(P) * colsof(P)

set obs `n' 
* Copiamos los p-values de la matriz P a la variable pval
forvalues i = 1/`n' {
    quietly replace pval = P[`i',1] if _n ==`i'
}
}

//pause



* Collect the total number of p-values tested

quietly sum pval
local totalpvals = r(N)

* Sort the p-values in ascending order and generate a variable that codes each p-value's rank

quietly gen int original_sorting_order = _n
quietly sort pval
quietly gen int rank = _n if pval~=.

* Set the initial counter to 1 

local qval = 1

* Generate the variable that will contain the BKY (2006) sharpened q-values

gen bky06_qval = 1 if pval~=.

* Set up a loop that begins by checking which hypotheses are rejected at q = 1.000, then checks which hypotheses are rejected at q = 0.999, then checks which hypotheses are rejected at q = 0.998, etc.  The loop ends by checking which hypotheses are rejected at q = 0.001.


while `qval' > 0 {
	* First Stage
	* Generate the adjusted first stage q level we are testing: q' = q/1+q
	local qval_adj = `qval'/(1+`qval')
	* Generate value q'*r/M
	gen fdr_temp1 = `qval_adj'*rank/`totalpvals'
	* Generate binary variable checking condition p(r) <= q'*r/M
	gen reject_temp1 = (fdr_temp1>=pval) if pval~=.
	* Generate variable containing p-value ranks for all p-values that meet above condition
	gen reject_rank1 = reject_temp1*rank
	* Record the rank of the largest p-value that meets above condition
	egen total_rejected1 = max(reject_rank1)

	* Second Stage
	* Generate the second stage q level that accounts for hypotheses rejected in first stage: q_2st = q'*(M/m0)
	local qval_2st = `qval_adj'*(`totalpvals'/(`totalpvals'-total_rejected1[1]))
	* Generate value q_2st*r/M
	gen fdr_temp2 = `qval_2st'*rank/`totalpvals'
	* Generate binary variable checking condition p(r) <= q_2st*r/M
	gen reject_temp2 = (fdr_temp2>=pval) if pval~=.
	* Generate variable containing p-value ranks for all p-values that meet above condition
	gen reject_rank2 = reject_temp2*rank
	* Record the rank of the largest p-value that meets above condition
	egen total_rejected2 = max(reject_rank2)

	* A p-value has been rejected at level q if its rank is less than or equal to the rank of the max p-value that meets the above condition
	replace bky06_qval = `qval' if rank <= total_rejected2 & rank~=.
	* Reduce q by 0.001 and repeat loop
	drop fdr_temp* reject_temp* reject_rank* total_rejected*
	local qval = `qval' - .001
}
	

quietly sort original_sorting_order
pause off
set more on

display "Code has completed."
display "Benjamini Krieger Yekutieli (2006) sharpened q-vals are in variable 'bky06_qval'"
display	"Sorting order is the same as the original vector of p-values"


* Note: Sharpened FDR q-vals can be LESS than unadjusted p-vals when many hypotheses are rejected, because if you have many true rejections, then you can tolerate several false rejections too (this effectively just happens for p-vals that are so large that you are not going to reject them regardless).

save TABLES\sharpened_qvalue\T7_sh_qvalue_`var'_`bw'.dta, replace 
restore 


********************************************************************************
**rwolf2 
********************************************************************************
preserve
use DTA/Datos_ENIA_95_07_Processed.dta, clear
GenPolynomial

set seed 1234

local dep_var `var' 


#delimit ;
rwolf2  (reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & all==1, vce(robust) )
		(reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & period1==1, vce(robust))
		(reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & period2==1, vce(robust))
		(reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & period3==1, vce(robust))
		(reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & sectorA==1, vce(robust))
		(reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & sectorB==1, vce(robust))
		(reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & small==1, vce(robust))
		(reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & large==1, vce(robust)),
		indepvars(above, above, above, above, above, above, above, above ) usevalid;	
#delimit cr 
restore
}

Table_Panels `bw' "Table7"
}
*******************************
**** TABLE 8 ******************
*******************************
{
use DTA/Datos_ENIA_95_07_Processed.dta, clear
GenPolynomial

global SubSample all period1 period2 period3 sectorA sectorB small large // sectorC sectorD
global Outcomes_List km_total km_rep km_nrep elecm combm 
local bw 10 

********************************************************************************
* Main specifications:
foreach var in $Outcomes_List{

matrix P = J(8,1,.)
local i = 0 
foreach sub_sample in $SubSample {
local i = `i'+1
qui areg `var' totmuj totmuj2 totmuj_above totmuj2_above above i.year, robust a(ciiu3), if abs(totmuj) <= `bw' & `sub_sample' == 1
estimates store `var'_`sub_sample'
qui estadd local SAMPLE `sub_sample'
matrix P[`i',1] = r(table)["pvalue","above"]

}

**************************
**** Sharpened q-value ***
**************************
preserve 
* This code generates BKY (2006) sharpened two-stage q-values as described in Anderson (2008), "Multiple Inference and Gender Differences in the Effects of Early Intervention: A Reevaluation of the Abecedarian, Perry Preschool, and Early Training Projects", Journal of the American Statistical Association, 103(484), 1481-1495

* BKY (2006) sharpened two-stage q-values are introduced in Benjamini, Krieger, and Yekutieli (2006), "Adaptive Linear Step-up Procedures that Control the False Discovery Rate", Biometrika, 93(3), 491-507

* Last modified: M. Anderson, 11/20/07
* Test Platform: Stata/MP 10.0 for Macintosh (Intel 32-bit), Mac OS X 10.5.1
* Should be compatible with Stata 10 or greater on all platforms
* Likely compatible with with Stata 9 or earlier on all platforms (remove "version 10" line below)

version 10

****  INSTRUCTIONS:
****    Please start with a clear data set
****	When prompted, paste the vector of p-values you are testing into the "pval" variable
****	Please use the "do" button rather than the "run" button to run this file (if you use "run", you will miss the instructions at the prompts)

pause on
set more off

if _N>0 {
	display "Please clear data set before proceeding"
	display "After clearing, type 'q' to resume"
	clear
	//pause
	}	
	 

quietly gen float pval = .

display "***********************************"
display "Please paste the vector of p-values that you wish to test into the variable 'pval'"
display	"After pasting, type 'q' to resume"
display "***********************************"


local i = 0
if `i' == 0 {
	local i = `i' + 1
* Obtenemos el número de elementos en la matriz P
local n = rowsof(P) * colsof(P)

set obs `n' 
* Copiamos los p-values de la matriz P a la variable pval
forvalues i = 1/`n' {
    quietly replace pval = P[`i',1] if _n ==`i'
}
}

//pause



* Collect the total number of p-values tested

quietly sum pval
local totalpvals = r(N)

* Sort the p-values in ascending order and generate a variable that codes each p-value's rank

quietly gen int original_sorting_order = _n
quietly sort pval
quietly gen int rank = _n if pval~=.

* Set the initial counter to 1 

local qval = 1

* Generate the variable that will contain the BKY (2006) sharpened q-values

gen bky06_qval = 1 if pval~=.

* Set up a loop that begins by checking which hypotheses are rejected at q = 1.000, then checks which hypotheses are rejected at q = 0.999, then checks which hypotheses are rejected at q = 0.998, etc.  The loop ends by checking which hypotheses are rejected at q = 0.001.


while `qval' > 0 {
	* First Stage
	* Generate the adjusted first stage q level we are testing: q' = q/1+q
	local qval_adj = `qval'/(1+`qval')
	* Generate value q'*r/M
	gen fdr_temp1 = `qval_adj'*rank/`totalpvals'
	* Generate binary variable checking condition p(r) <= q'*r/M
	gen reject_temp1 = (fdr_temp1>=pval) if pval~=.
	* Generate variable containing p-value ranks for all p-values that meet above condition
	gen reject_rank1 = reject_temp1*rank
	* Record the rank of the largest p-value that meets above condition
	egen total_rejected1 = max(reject_rank1)

	* Second Stage
	* Generate the second stage q level that accounts for hypotheses rejected in first stage: q_2st = q'*(M/m0)
	local qval_2st = `qval_adj'*(`totalpvals'/(`totalpvals'-total_rejected1[1]))
	* Generate value q_2st*r/M
	gen fdr_temp2 = `qval_2st'*rank/`totalpvals'
	* Generate binary variable checking condition p(r) <= q_2st*r/M
	gen reject_temp2 = (fdr_temp2>=pval) if pval~=.
	* Generate variable containing p-value ranks for all p-values that meet above condition
	gen reject_rank2 = reject_temp2*rank
	* Record the rank of the largest p-value that meets above condition
	egen total_rejected2 = max(reject_rank2)

	* A p-value has been rejected at level q if its rank is less than or equal to the rank of the max p-value that meets the above condition
	replace bky06_qval = `qval' if rank <= total_rejected2 & rank~=.
	* Reduce q by 0.001 and repeat loop
	drop fdr_temp* reject_temp* reject_rank* total_rejected*
	local qval = `qval' - .001
}
	

quietly sort original_sorting_order
pause off
set more on

display "Code has completed."
display "Benjamini Krieger Yekutieli (2006) sharpened q-vals are in variable 'bky06_qval'"
display	"Sorting order is the same as the original vector of p-values"


* Note: Sharpened FDR q-vals can be LESS than unadjusted p-vals when many hypotheses are rejected, because if you have many true rejections, then you can tolerate several false rejections too (this effectively just happens for p-vals that are so large that you are not going to reject them regardless).

save TABLES\sharpened_qvalue\T8_sh_qvalue_`var'_`bw'.dta, replace 
restore 


********************************************************************************
**rwolf2 
********************************************************************************
preserve
use DTA/Datos_ENIA_95_07_Processed.dta, clear
GenPolynomial

set seed 1234

local dep_var `var' 


#delimit ;
rwolf2  (reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & all==1, vce(robust) )
		(reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & period1==1, vce(robust))
		(reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & period2==1, vce(robust))
		(reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & period3==1, vce(robust))
		(reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & sectorA==1, vce(robust))
		(reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & sectorB==1, vce(robust))
		(reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & small==1, vce(robust))
		(reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & large==1, vce(robust)),
		indepvars(above, above, above, above, above, above, above, above ) usevalid;	
#delimit cr 
restore
}
Table_Panels `bw' "Table8"
}
*******************************
**** TABLE 9 *****************
*******************************
{
use DTA/Datos_ENIA_95_07_Processed.dta, clear
GenPolynomial

global SubSample all period1 period2 period3 sectorA sectorB small large // sectorC sectorD
global Outcomes_List hl_skills_l hl_skills_w hl_skills_m 
local bw 10 

********************************************************************************
* Main specifications:
foreach var in $Outcomes_List{
*estimates clear

matrix P = J(8,1,.)
local i = 0 
foreach sub_sample in $SubSample {
local i = `i'+1
qui areg `var' totmuj totmuj2 totmuj_above totmuj2_above above i.year, robust a(ciiu3), if abs(totmuj) <= `bw' & `sub_sample' == 1
estimates store `var'_`sub_sample'
qui estadd local SAMPLE `sub_sample'
matrix P[`i',1] = r(table)["pvalue","above"]

}


**************************
**** Sharpened q-value ***
**************************
preserve 
* This code generates BKY (2006) sharpened two-stage q-values as described in Anderson (2008), "Multiple Inference and Gender Differences in the Effects of Early Intervention: A Reevaluation of the Abecedarian, Perry Preschool, and Early Training Projects", Journal of the American Statistical Association, 103(484), 1481-1495

* BKY (2006) sharpened two-stage q-values are introduced in Benjamini, Krieger, and Yekutieli (2006), "Adaptive Linear Step-up Procedures that Control the False Discovery Rate", Biometrika, 93(3), 491-507

* Last modified: M. Anderson, 11/20/07
* Test Platform: Stata/MP 10.0 for Macintosh (Intel 32-bit), Mac OS X 10.5.1
* Should be compatible with Stata 10 or greater on all platforms
* Likely compatible with with Stata 9 or earlier on all platforms (remove "version 10" line below)

version 10

****  INSTRUCTIONS:
****    Please start with a clear data set
****	When prompted, paste the vector of p-values you are testing into the "pval" variable
****	Please use the "do" button rather than the "run" button to run this file (if you use "run", you will miss the instructions at the prompts)

pause on
set more off

if _N>0 {
	display "Please clear data set before proceeding"
	display "After clearing, type 'q' to resume"
	clear
	//pause
	}	
	 

quietly gen float pval = .

display "***********************************"
display "Please paste the vector of p-values that you wish to test into the variable 'pval'"
display	"After pasting, type 'q' to resume"
display "***********************************"


local i = 0
if `i' == 0 {
	local i = `i' + 1
* Obtenemos el número de elementos en la matriz P
local n = rowsof(P) * colsof(P)

set obs `n' 
* Copiamos los p-values de la matriz P a la variable pval
forvalues i = 1/`n' {
    quietly replace pval = P[`i',1] if _n ==`i'
}
}

//pause



* Collect the total number of p-values tested

quietly sum pval
local totalpvals = r(N)

* Sort the p-values in ascending order and generate a variable that codes each p-value's rank

quietly gen int original_sorting_order = _n
quietly sort pval
quietly gen int rank = _n if pval~=.

* Set the initial counter to 1 

local qval = 1

* Generate the variable that will contain the BKY (2006) sharpened q-values

gen bky06_qval = 1 if pval~=.

* Set up a loop that begins by checking which hypotheses are rejected at q = 1.000, then checks which hypotheses are rejected at q = 0.999, then checks which hypotheses are rejected at q = 0.998, etc.  The loop ends by checking which hypotheses are rejected at q = 0.001.


while `qval' > 0 {
	* First Stage
	* Generate the adjusted first stage q level we are testing: q' = q/1+q
	local qval_adj = `qval'/(1+`qval')
	* Generate value q'*r/M
	gen fdr_temp1 = `qval_adj'*rank/`totalpvals'
	* Generate binary variable checking condition p(r) <= q'*r/M
	gen reject_temp1 = (fdr_temp1>=pval) if pval~=.
	* Generate variable containing p-value ranks for all p-values that meet above condition
	gen reject_rank1 = reject_temp1*rank
	* Record the rank of the largest p-value that meets above condition
	egen total_rejected1 = max(reject_rank1)

	* Second Stage
	* Generate the second stage q level that accounts for hypotheses rejected in first stage: q_2st = q'*(M/m0)
	local qval_2st = `qval_adj'*(`totalpvals'/(`totalpvals'-total_rejected1[1]))
	* Generate value q_2st*r/M
	gen fdr_temp2 = `qval_2st'*rank/`totalpvals'
	* Generate binary variable checking condition p(r) <= q_2st*r/M
	gen reject_temp2 = (fdr_temp2>=pval) if pval~=.
	* Generate variable containing p-value ranks for all p-values that meet above condition
	gen reject_rank2 = reject_temp2*rank
	* Record the rank of the largest p-value that meets above condition
	egen total_rejected2 = max(reject_rank2)

	* A p-value has been rejected at level q if its rank is less than or equal to the rank of the max p-value that meets the above condition
	replace bky06_qval = `qval' if rank <= total_rejected2 & rank~=.
	* Reduce q by 0.001 and repeat loop
	drop fdr_temp* reject_temp* reject_rank* total_rejected*
	local qval = `qval' - .001
}
	

quietly sort original_sorting_order
pause off
set more on

display "Code has completed."
display "Benjamini Krieger Yekutieli (2006) sharpened q-vals are in variable 'bky06_qval'"
display	"Sorting order is the same as the original vector of p-values"


* Note: Sharpened FDR q-vals can be LESS than unadjusted p-vals when many hypotheses are rejected, because if you have many true rejections, then you can tolerate several false rejections too (this effectively just happens for p-vals that are so large that you are not going to reject them regardless).

save TABLES\sharpened_qvalue\T9_sh_qvalue_`var'_`bw'.dta, replace 
restore 


********************************************************************************
**rwolf2 
********************************************************************************
preserve
use DTA/Datos_ENIA_95_07_Processed.dta, clear
GenPolynomial

set seed 1234

local dep_var `var' 


#delimit ;
rwolf2  (reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & all==1, vce(robust) )
		(reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & period1==1, vce(robust))
		(reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & period2==1, vce(robust))
		(reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & period3==1, vce(robust))
		(reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & sectorA==1, vce(robust))
		(reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & sectorB==1, vce(robust))
		(reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & small==1, vce(robust))
		(reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & large==1, vce(robust)),
		indepvars(above, above, above, above, above, above, above, above ) usevalid;	
#delimit cr 
restore
}
Table_Panels `bw' "Table9"

}
*******************************
**** TABLE 10 *****************
*******************************
{
use DTA/Datos_ENIA_95_07_Processed.dta, clear
GenPolynomial

global SubSample all period1 period2 period3 sectorA sectorB small large // sectorC sectorD
global Outcomes_List wage yl ing_l cost_l
local bw 10 

********************************************************************************
* Main specifications:
foreach var in $Outcomes_List{
*estimates clear

matrix P = J(8,1,.)
local i = 0 
foreach sub_sample in $SubSample {
local i = `i'+1
qui areg `var' totmuj totmuj2 totmuj_above totmuj2_above above i.year, robust a(ciiu3), if abs(totmuj) <= `bw' & `sub_sample' == 1
estimates store `var'_`sub_sample'
qui estadd local SAMPLE `sub_sample'
matrix P[`i',1] = r(table)["pvalue","above"]

}

**************************
**** Sharpened q-value ***
**************************
preserve 
* This code generates BKY (2006) sharpened two-stage q-values as described in Anderson (2008), "Multiple Inference and Gender Differences in the Effects of Early Intervention: A Reevaluation of the Abecedarian, Perry Preschool, and Early Training Projects", Journal of the American Statistical Association, 103(484), 1481-1495

* BKY (2006) sharpened two-stage q-values are introduced in Benjamini, Krieger, and Yekutieli (2006), "Adaptive Linear Step-up Procedures that Control the False Discovery Rate", Biometrika, 93(3), 491-507

* Last modified: M. Anderson, 11/20/07
* Test Platform: Stata/MP 10.0 for Macintosh (Intel 32-bit), Mac OS X 10.5.1
* Should be compatible with Stata 10 or greater on all platforms
* Likely compatible with with Stata 9 or earlier on all platforms (remove "version 10" line below)

version 10

****  INSTRUCTIONS:
****    Please start with a clear data set
****	When prompted, paste the vector of p-values you are testing into the "pval" variable
****	Please use the "do" button rather than the "run" button to run this file (if you use "run", you will miss the instructions at the prompts)

pause on
set more off

if _N>0 {
	display "Please clear data set before proceeding"
	display "After clearing, type 'q' to resume"
	clear
	//pause
	}	
	 

quietly gen float pval = .

display "***********************************"
display "Please paste the vector of p-values that you wish to test into the variable 'pval'"
display	"After pasting, type 'q' to resume"
display "***********************************"


local i = 0
if `i' == 0 {
	local i = `i' + 1
* Obtenemos el número de elementos en la matriz P
local n = rowsof(P) * colsof(P)

set obs `n' 
* Copiamos los p-values de la matriz P a la variable pval
forvalues i = 1/`n' {
    quietly replace pval = P[`i',1] if _n ==`i'
}
}

//pause



* Collect the total number of p-values tested

quietly sum pval
local totalpvals = r(N)

* Sort the p-values in ascending order and generate a variable that codes each p-value's rank

quietly gen int original_sorting_order = _n
quietly sort pval
quietly gen int rank = _n if pval~=.

* Set the initial counter to 1 

local qval = 1

* Generate the variable that will contain the BKY (2006) sharpened q-values

gen bky06_qval = 1 if pval~=.

* Set up a loop that begins by checking which hypotheses are rejected at q = 1.000, then checks which hypotheses are rejected at q = 0.999, then checks which hypotheses are rejected at q = 0.998, etc.  The loop ends by checking which hypotheses are rejected at q = 0.001.


while `qval' > 0 {
	* First Stage
	* Generate the adjusted first stage q level we are testing: q' = q/1+q
	local qval_adj = `qval'/(1+`qval')
	* Generate value q'*r/M
	gen fdr_temp1 = `qval_adj'*rank/`totalpvals'
	* Generate binary variable checking condition p(r) <= q'*r/M
	gen reject_temp1 = (fdr_temp1>=pval) if pval~=.
	* Generate variable containing p-value ranks for all p-values that meet above condition
	gen reject_rank1 = reject_temp1*rank
	* Record the rank of the largest p-value that meets above condition
	egen total_rejected1 = max(reject_rank1)

	* Second Stage
	* Generate the second stage q level that accounts for hypotheses rejected in first stage: q_2st = q'*(M/m0)
	local qval_2st = `qval_adj'*(`totalpvals'/(`totalpvals'-total_rejected1[1]))
	* Generate value q_2st*r/M
	gen fdr_temp2 = `qval_2st'*rank/`totalpvals'
	* Generate binary variable checking condition p(r) <= q_2st*r/M
	gen reject_temp2 = (fdr_temp2>=pval) if pval~=.
	* Generate variable containing p-value ranks for all p-values that meet above condition
	gen reject_rank2 = reject_temp2*rank
	* Record the rank of the largest p-value that meets above condition
	egen total_rejected2 = max(reject_rank2)

	* A p-value has been rejected at level q if its rank is less than or equal to the rank of the max p-value that meets the above condition
	replace bky06_qval = `qval' if rank <= total_rejected2 & rank~=.
	* Reduce q by 0.001 and repeat loop
	drop fdr_temp* reject_temp* reject_rank* total_rejected*
	local qval = `qval' - .001
}
	

quietly sort original_sorting_order
pause off
set more on

display "Code has completed."
display "Benjamini Krieger Yekutieli (2006) sharpened q-vals are in variable 'bky06_qval'"
display	"Sorting order is the same as the original vector of p-values"


* Note: Sharpened FDR q-vals can be LESS than unadjusted p-vals when many hypotheses are rejected, because if you have many true rejections, then you can tolerate several false rejections too (this effectively just happens for p-vals that are so large that you are not going to reject them regardless).

save TABLES\sharpened_qvalue\T10_sh_qvalue_`var'_`bw'.dta, replace 
restore 


********************************************************************************
**rwolf2 
********************************************************************************
preserve
use DTA/Datos_ENIA_95_07_Processed.dta, clear
GenPolynomial

set seed 1234

local dep_var `var' 


#delimit ;
rwolf2  (reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & all==1, vce(robust) )
		(reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & period1==1, vce(robust))
		(reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & period2==1, vce(robust))
		(reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & period3==1, vce(robust))
		(reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & sectorA==1, vce(robust))
		(reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & sectorB==1, vce(robust))
		(reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & small==1, vce(robust))
		(reg `dep_var' totmuj totmuj2 totmuj_above totmuj2_above above i.ciiu3 i.year if abs(totmuj) <= `bw' & large==1, vce(robust)),
		indepvars(above, above, above, above, above, above, above, above ) usevalid;	
#delimit cr 
restore
}

Table_Panels `bw' "Table10"
}
